import hashlib
import json

import scrapy
from redis import StrictRedis


class PenpaiSpider(scrapy.Spider):
    name = 'penpai'
    allowed_domains = ['www.thepaper.cn']
    start_urls = ['https://www.thepaper.cn/load_index.jsp?nodeids=25600&topCids=&pageidx=1']

    url = "https://www.thepaper.cn/load_index.jsp?nodeids=25600&topCids=&pageidx=%d"
    page_number = 2

    def parse(self, response):
        news_list = response.xpath("//div[@class='news_li']")
        for i in news_list:
            title = i.xpath("./h2/a/text()").extract_first()
            href = 'https://www.thepaper.cn/' + i.xpath("./h2/a/@href").extract_first()
            img_src = i.xpath("./div[@class='news_tu']/a/img/@src").extract_first()
            # print("-------------->", title, href, img_src)

            item = {
                "type": "news",
                "title": title,
                "href": href,
                "img_src": img_src
            }

            yield item

        if self.page_number <= 5:
            self.page_number += 1
            new_url = format(self.url % self.page_number)
            # 手动请求(get)的发送
            yield scrapy.Request(new_url, callback=self.parse)